Installation
Follow the steps below to install ScReNI package from GitHub and run it:
install.packages('devtools')
devtools::install_github('Xuxl2020/ScReNI')
# require
library(ScReNI)
# load requried packages
library(Seurat) # remotes::install_version("Seurat", "5.0.1", repos = c("https://satijalab.r-universe.dev", getOption("repos")))
library(SeuratObject) # remotes::install_version("SeuratObject", "5.0.1", repos = c("https://satijalab.r-universe.dev", getOption("repos")))
library(Signac) # 1.10.0
library(dplyr)
library(ggpubr)
library(monocle3)
library(ggplot2)
library(igraph)
library(Matrix) ###remotes::install_version("Matrix", version = "1.6-5")
library(harmony) ###remotes::install_version("harmony", version = "0.1.1")
library(sctransform) ###remotes::install_version("sctransform", version = "0.4.1")
library(GenomeInfoDb)
library(EnsDb.Mmusculus.v79)
library(ComplexHeatmap)
library(IReNA)
library(patchwork)
library(future)
library(cowplot)
library(circlize)
Introduction
ScReNI infers the regulatory network for each cell through integrating unpaired scRNA-seq and scATAC-seq data. This comprehensive tutorial is structured into five steps: (1) Clustering Integration; (2) Inference of Single Cell-Specific Networks; (3) Evaluation of Regulatory Networks; (4) Cell Clustering Based on Networks; (5) Identification of Enriched Regulators.
Input Data
Seurat Object
The Seurat object is expected to encapsulate cell type information within the ‘active.ident’ slot. This slot is pivotal as it designates the identity of each cell within the dataset. For those seeking to test the functionality, example datasets can be obtained from in the ‘data’ folder.
Please ensure that the Seurat object is properly formatted and includes all necessary metadata for accurate analysis.
# Define data information
data.name = 'mmRetina_RPCMG'
species = 'mouse'
data.type = 'unpaired'
# Load scRNA-seq and scATAC-seq data
scRNAseq <- readRDS(paste0(path, data.name, "_scRNAseq.rds"))
scATACseq <- readRDS(paste0(path, data.name, "_scATACseq.rds"))
Step 1: perform clustering to integrate scRNA-seq and scATAC-seq data
In this Step, we’ll demonstrate how to jointly analyze a single-cell dataset measuring both DNA accessibility and gene expression in the same cells using Signac and Seurat. For more detailed content and introduction, please refer to the website (https://stuartlab.org/signac/articles/pbmc_multiomic)
# Step 1.1: integrate scRNA-seq and scATAC-seq
# Define integration parameters
IntegratedDimensions <- 20
KNN <- 20
AnchorsDim <- 50
# for unpaired scRNA-seq and scATAC-seq data
coembed <- Integrate_scRNA_scATAC(scRNAseq, scATACseq, AnchorsDim, IntegratedDimensions, KNN, data.type='unpaired')
# for paired scRNA-seq and scATAC-seq data
# coembed <- Integrate_scRNA_scATAC(scRNAseq, scATACseq, IntegratedDimensions, KNN, data.type='paired', species)
# Step 1.2: plot figures for the integrated scRNAseq and scATACseq
coembed <- readRDS('data/mmRetina_RPCMG_integrated_scRNA_scATAC_D30.rds')
groups=c('datatypes', 'samples', 'celltypes')
print(DimPlot(coembed, group.by = groups[1], reduction = "umap", pt.size = 0.8, label = F)
| DimPlot(coembed, group.by = groups[2], reduction = "umap", pt.size = 0.8, label = F)
| DimPlot(coembed, group.by = groups[3], reduction = "umap", pt.size = 0.8, label = T))
# Step 1.3 only for unpaired data: get the nearest neighbor of each cell. However, for paired data, step 1.3 is not necessary. Instead, you can proceed directly to step 2 of the analysis pipeline.
scRNA_scATAC_neighbor <- Get_scRNA_scATAC_neighbors(coembed)
scRNA_scATAC_neighbor_dup <- scRNA_scATAC_neighbor[[1]]
scRNA_scATAC_neighbor_undup <- scRNA_scATAC_neighbor[[2]]
Step 2: infer single cell-specific networks
We reconstructed the single-cell networks using CSN, LIONESS, kScReNI, and CeSpGRN. These tools infer cell-specific regulatory networks using transcriptomes alone. wScReNI stands out as the primary analytical tool for integrating scRNA-seq and scATAC-seq data. In our analysis, we focused on the top 500 highly variable genes and the top 10,000 highly variable peaks for feature selection. To reconstruct the cell-specific networks, we randomly selected 100 cells from each of three distinct types of retinal progenitor cells (RPCs) and one type of Müller glial cells (MGs).Researchers have the flexibility to either utilize the entire population of cells for a comprehensive analysis or select a subset of cells to infer the single-cell regulatory networks. This flexibility allows for tailored analyses depending on the specific scientific questions and the computational resources available.
# set the parameters
cell.num = 100 # number of each cell type
ngenes = 500 # number of highly variable genes
npeaks = 10000 # number of highly variable peaks
result.path = 'results/'
network.path = paste0(result.path, 'NetworksC', cell.num, '/') # the path to save networks
# Step 2.1: get partial cells to infer scNetworks
# partial cells
Partial_cell <- c("RPCs_S1", "RPCs_S2", "RPCs_S3", "MG")
# for unpaired data
scRNA.scATAC.cells <- Select_partial_cells_for_scNewtorks(coembed, Partial_cell, cell.num, data.type = "unpaired", scRNA_scATAC_neighbor_undup)
# for paired data
# scRNA.scATAC.cells <- Select_partial_cells_for_scNewtorks(coembed, Partial_cell, cell.num, data.type = "paired")
In our approach to applying network algorithms for inferring regulatory networks, we utilize count data as the primary input. However, the choice of data can be adapted based on the particularities of the research question at hand. If necessary, you have the option to opt for processed data that has undergone various computational transformations. Such transformations might include normalization, variance stabilization, or other forms of data preprocessing.
# Step 2.2: select top variable genes and peaks to infer scNetworks
sub.scrna.top <- select_features(sub.scrna, ngenes, datatype='RNA')
sub.scatac.top <- select_features(sub.scatac, npeaks, datatype='ATAC')
sub.scrna.top <- sub.scrna.top$counts
sub.scatac.top <- sub.scatac.top$counts
The implementation of CSN, LIONESS, and kScReNI is primarily done through R code. However, for CeSpGRN, the process requires Python code, which necessitates running Python scripts within a Python environment. The specific Python scripts you need are located in the ‘mmRetina_RPCMG_CeSpGRN_Python.txt’ file within the R folder. This file contains the Python code required for your analysis. For a deeper understanding of the algorithm and to access additional resources, you are encouraged to visit Zhang’s Lab, the originators of this methodology (https://github.com/PeterZZQ/CeSpGRN). Their expertise and comprehensive materials will provide further insights and guidance.
# Step 2.3: infer scNetworks using CSN
CSN <- Infer_CSN_scNetworks(sub.scrna.top)
# Step 2.4: infer scNetworks using LIONESS
LIONESS.weights <- Infer_LIONESS_scNetworks(sub.scrna.top)
# Step 2.5: infer scNetworks using kScReNI
kScReNI.weights <- Infer_kScReNI_scNetworks(sub.scrna.top, ngenes, knn=KNN)
# Step 2.6: infer scNetworks using CeSpGRN
print("Execute the Python code in ‘R/mmRetina_RPCMG_CeSpGRN_Python.txt")
When utilizing the wScReNI approach, we rely on reference datasets that are made available in the ‘refer’ folder. These datasets are indispensable for elucidating the connections between genes and peaks, which is a fundamental step in the regulatory network construction process. However, if you have prior knowledge of the specific relationships between genes and peaks, you have the flexibility to bypass the step of identifying these associations. In such cases, you can proceed directly to Step 2.7.2 of the network construction phase, where you will integrate this known information into the framework of the regulatory network. This option allows for a more streamlined workflow, particularly when the foundational relationships are already established, enabling researchers to focus on the higher-level analysis of network dynamics and regulatory mechanisms.
# Step 2.7: infer scNetworks using wScReNI
# Step 2.7.1: identification of peak-associated genes
# reference datasets for identification of peak-associated genes
library(BSgenome.Mmusculus.UCSC.mm10) ### 1.4.3
genome_database = BSgenome.Mmusculus.UCSC.mm10
motif_database <- read.table("refer/Tranfac201803_Mm_MotifTFsFinal.txt", header = T)
gtf_data <- rtracklayer::import("refer/mouse.genes.gtf")
gtf_data <- as.data.frame(gtf_data)
motif_pwm <- readRDS("refer/all_motif_pwm.rds")
Identification of peak-associated genes
gene_peak_overlap <- Infer_gene_peak_relationships(gtf_data, sub.scrna.top, sub.scatac.top,
motif_database, motif_pwm, genome_database = genome_database)
gene_peak_overlap_matrix <- peakMat(sub.scatac.top, gene_peak_overlap)
gene_peak_overlap_labs <- peak_gene_TF_labs(gene_peak_overlap)
# Step 2.7.2: get the nearest neighbor of each cell
scRNA_scATAC_WNN <- Integrate_scRNA_scATAC(sub.scrna.top, sub.scatac.top, IntegratedDimensions=20, KNN, data.type='paired')
nearest.neighbors.idx <- scRNA_scATAC_WNN[['weighted.nn']]@nn.idx
rownames(nearest.neighbors.idx) <- scRNA_scATAC_WNN[['weighted.nn']]@cell.names
ncell = 400
# Step 2.7.3: infer scNetworks using wScReNI
wScReNI.weights <- Infer_wScReNI_scNetworks(sub.scrna.top, gene_peak_overlap_matrix, gene_peak_overlap_labs, nearest.neighbors.idx, network.path, data.name, cell.index=1:ncell)
wScReNI.weights <- Combine_wScReNI_scNetworks(sub.scrna.top, network.path)
Step 3: evaluation of the precision and recall on regulatory networks
ChIP-atlas was used to assess the inferred regulatory relationships. This dataset serves as a valuable reference for evaluating the performance of different network inference methods. Precision and recall serve as metrics for evaluating the performance of different single-cell regulatory network inference methods.
# Read the relationships of gene ID and gene symbol based on species
Gene_gtf_information <- 'gtf_region.10X_Mouse_ref'
ChIP_atlas <- 'mmp9.TSV.5kb_TF_target.df'
gtf_regions <- read.table(paste0('refer/', Gene_gtf_information, ".txt"), sep = "\t", header = T)
gene_id_gene_name_pair <- Deal_gene_information(gtf_regions)
# Read TF-target pairs from ChIP Atlas
TF_target <-read.table(paste0('refer/', ChIP_atlas, ".txt"), sep="\t")
TF_target_pair <- unique(paste(TF_target[, 1], TF_target[, 2], sep="_"))
# make sure the first element of scNetworks is from CSN
scNetworks <- list(CSN, CeSpGRN.weights, LIONESS.weights, kScReNI.weights, wScReNI.weights)
names(scNetworks) <- c('CSN', 'CeSpGRN', 'LIONESS', 'kScReNI', 'wScReNI')
# Set the parameters
top_number = c(1000, seq(2000, 10000, by=2000), 20000)
# Calculate the precision and recall
# Calculate the precision and recall
scNetworks_precision_recall <- Calculate_scNetwork_precision_recall(scNetworks, TF_target_pair, top_number)
scNetworks_precision_recall_noCSN <- scNetworks_precision_recall[-1]
scNetworks_precision_recall_top <- Calculate_scNetwork_precision_recall_top(scNetworks_precision_recall_noCSN, top_number)
# Plot the precision and recall
scNetworks_precision_recall <- read.csv('data/mmRetina_RPCMG_Cell100.500_scNetwork_precision_recall.csv', row.names = 1)
colors = c("#5A5A5A", "#EF9951", "#A9A9A9", "#67C1E3", rgb(253/255,209/255,176/255))
p1 <- ggplot(scNetworks_precision_recall, aes(x=scNetwork_type, y=precision, color=scNetwork_type)) +
geom_boxplot() + scale_color_manual(values=colors)+
theme_classic() + theme(axis.text=element_text(size=14), axis.title=element_text(size=16))
p2 <- ggplot(scNetworks_precision_recall, aes(x=scNetwork_type, y=recall, color=scNetwork_type)) +
geom_boxplot() + scale_color_manual(values=colors)+
theme_classic() + theme(axis.text=element_text(size=14), axis.title=element_text(size=16))
ggarrange(p1, p2, labels = c("A", "B"), ncol = 2, nrow = 1)
Step 4: perform cell clustering based on single cell-specific networks
For the purpose of network-based cell clustering, we calculated gene degrees derived from cell-specific networks. To evaluate the influence of the number of pairwise relationships between TFs and target genes, we applied diverse thresholds to the ranked weights. Specifically, we select the top 500 gene regulation pairs. To quantitatively measure the similarity between the resulting clustering and the known ground truth of cell types, we employ the adjusted rand index (ARI). This metric is implemented in the ‘randIndex’ function from the R package ‘flexclust’, providing a robust statistical measure for evaluating the accuracy of our clustering method.
# Step 4.1: Calculate the degrees and Adjusted Rand Index (ARI) according to single cell-specific networks
# ntype = 4 for mmRetina_RPCMG
ncell = 400; ntype = 4; top = rep(500, ncell)
celltype <- read.csv(paste0('data/', data.name, "_Cell", cell.num, "_annotation.csv"))
sub.celltype <- celltype$undup.cell.types
scNetworks_degree <- calculate_scNetwork_degree(scNetworks, top, sub.celltype, ntype, column_name='sub.celltype')
# Step 4.2: Plot cell clustering based on the networks
load('data/scNetworks_degree.rdata')
col = list(Cell_type = c("MG" = "red", "RPCs_S1" = "green", "RPCs_S2" = "blue", "RPCs_S3" = "purple"))
Network_types <- names(scNetworks_degree)
for(i in 1:length(scNetworks_degree)){
print(Network_types[i])
degree <- scNetworks_degree[[i]]
outdegree <- degree[['outdegree']]
out.degree <- degree[['out.degree.umap']]
# plot the heatmap based on the similarity of the degree matrix
column_ha = HeatmapAnnotation("Cell_type" = sub.celltype, col = col)
print(Heatmap(cor(log(outdegree+1)),
top_annotation = column_ha,
show_row_names = F,
show_column_names = F))
out.degree[['lab']] <- sub.celltype
print(DimPlot(out.degree, reduction = "umap", group.by = "lab", label = F))
}
## [1] "CSN"
## [1] "LIONESS"
## [1] "kScReNI"
## [1] "CeSpGRN"
## [1] "wScReNI"
# Step 4.3: Save ARI for each single-cell network inference method
ARI <- sapply(scNetworks_degree, function(network) {
c(network[["out.degree.umap.ARI"]], network[["out.degree.hclust.ARI"]])
})
rownames(ARI) <- c( "degree.umap.ARI", "degree.hclust.ARI")
ARI
## CSN LIONESS kScReNI CeSpGRN wScReNI
## degree.umap.ARI 0.4879068 0.0000000000 0.5169255 0.5127162 0.6420433
## degree.hclust.ARI 0.2256772 0.0002397088 0.3881163 0.2384845 0.4807174
Step 5: identify regulators enriched in each single-cell regulatory network
We used Monocle3 to investigate pseudotime trajectories in scRNA-seq data of mouse retinal development. Cells were clustered via UMAP reduction, and the principal graph was determined using the learn_graph function. Any RPC-I cell was set as the root when running order_cells. These data were then added back to Seurat as a metadata column for further analysis.
# Step 5.1: perform trajectory analysis to calculate pseudotime
DefaultAssay(coembed) <- "RNA"
load('data/gene.use.rdata')
seurat_object <- coembed[gene.use,]
expression_matrix0 <- as.matrix(seurat_object@assays$RNA@counts)
seurat.obj <- seurat_object[,Matrix::colSums(expression_matrix0) != 0]
expression_matrix <- expression_matrix0[,Matrix::colSums(expression_matrix0) != 0]
cell_metadata <- seurat_object@meta.data[Matrix::colSums(expression_matrix0) != 0,]
gene_annotation <- data.frame(gene_short_name=rownames(expression_matrix))
rownames(gene_annotation) <- rownames(expression_matrix)
# monocle3
cds <- new_cell_data_set(expression_matrix,
cell_metadata = cell_metadata,
gene_metadata = gene_annotation)
# NormalizeData+ScaleData+RunPCA
cds <- preprocess_cds(cds, num_dim = 30)
cds <- reduce_dimension(cds, preprocess_method = "PCA")
# umap
cds.embed <- cds@int_colData$reducedDims$UMAP
int.embed <- Embeddings(seurat.obj, reduction = "umap")
int.embed <- int.embed[rownames(cds.embed),]
cds@int_colData$reducedDims$UMAP <- int.embed
cds@clusters$UMAP$clusters <- seurat.obj@meta.data[rownames(colData(cds)), "seurat_clusters"]
cds@clusters$UMAP$partitions <- factor(x = rep(1, length(rownames(colData(cds)))), levels = 1)
names(cds@clusters$UMAP$partitions) <- rownames(colData(cds))
cds <- estimate_size_factors(cds)
cds@rowRanges@elementMetadata@listData$gene_short_name <- rownames(cds)
rownames(cds@principal_graph_aux[["UMAP"]]$dp_mst) <- NULL
colnames(cds@int_colData@listData$reducedDims@listData$UMAP) <- NULL
cds <- learn_graph(cds,learn_graph_control=list("euclidean_distance_ratio"=5, "geodesic_distance_ratio"=2, "minimal_branch_len"=1000))
load('data/cds_learn_graph.rdata')
plot_cells(cds,
color_cells_by = "seurat_clusters",
label_groups_by_cluster = FALSE,
label_leaves = TRUE,
label_branch_points = TRUE) +
theme(panel.border = element_rect(fill=NA, color="black", size=1, linetype="solid"))
cds <- order_cells(cds)
load('data/cds_order_cells.rdata')
plot_cells(cds,
color_cells_by = "pseudotime",
label_cell_groups = F,
label_leaves = F,
label_branch_points = F,
graph_label_size = 1.5) +
theme(panel.border = element_rect(fill=NA, color="black", size=1, linetype="solid"))
# Add pseudotime to the Seurat object
pseudotime <- pseudotime(cds, reduction_method = 'UMAP')
load('data/seurat.obj.rdata')
pseudotime <- pseudotime[rownames(seurat.obj@meta.data)]
seurat.obj$Pseudotime <- pseudotime
FeaturePlot(seurat.obj, reduction = "umap", features = "Pseudotime")
Considering the sparsity of scRNA-seq data, we employed smoothed
expression profiles for gene co-expression analysis. Pseudotime was
divided into 50 equal intervals. Highly variable genes and expressed
transcription factors were grouped into distinct modules using K-means
clustering on the smoothed profiles. For each module, we conducted
functional enrichment analysis using ClusterProfiler.
print("Step 5.2: smooth the expression of highly variable genes according to pseudotime")
# Get expression profiles ordered by pseudotime
seurat_with_time = seurat.obj
expression_profile <- IReNA::get_SmoothByBin_PseudotimeExp(seurat_with_time, Bin = 50)
print("Step 5.3: modularized highly variable genes through k-means of the smoothed expression")
# Filter noise and logFC in expression profile
expression_profile_filter <- IReNA::filter_expression_profile(expression_profile, FC=0.01)
# K-means clustering
clustering <- IReNA::clustering_Kmeans(expression_profile_filter, K1=10)
clustering <- read.table('data/clustering_revised.txt', head=T)
col = c(colors <- c(rgb(174/255,199/255,232/255), rgb(103/255,193/255,227/255), rgb(91/255,166/255,218/255), rgb(0/255,114/255,189/255), rgb(253/255,209/255,176/255), rgb(239/255,153/255,81/255), rgb(229/255,97/255,69/255),
rgb(175/255,180/255,219/255)))
plot_kmeans_pheatmap(clustering,
ModuleColor1 = c(colors <- col))
Kmeans_clustering_ENS <- add_ENSID(clustering, Spec1='Mm')
# KEGG
library(org.Mm.eg.db)
keytypes(org.Mm.eg.db)
## [1] "ACCNUM" "ALIAS" "ENSEMBL" "ENSEMBLPROT" "ENSEMBLTRANS"
## [6] "ENTREZID" "ENZYME" "EVIDENCE" "EVIDENCEALL" "GENENAME"
## [11] "GENETYPE" "GO" "GOALL" "IPI" "MGI"
## [16] "ONTOLOGY" "ONTOLOGYALL" "PATH" "PFAM" "PMID"
## [21] "PROSITE" "REFSEQ" "SYMBOL" "UNIPROT"
library(KEGG.db)
enrichment_KEGG <- enrich_module(Kmeans_clustering_ENS, org.Mm.eg.db, enrich.db = 'KEGG', organism = 'mmu')
head(enrichment_KEGG)
## ID Description module -log10(q-value) GeneRatio
## mmu04115 mmu04115 p53 signaling pathway 1 1.9278276 7/96
## mmu04310 mmu04310 Wnt signaling pathway 1 1.5225748 9/96
## mmu04110 mmu04110 Cell cycle 1 0.8189731 7/96
## mmu00790 mmu00790 Folate biosynthesis 1 0.5398458 2/96
## mmu03022 mmu03022 Basal transcription factors 1 0.5304544 3/96
## mmu03010 mmu03010 Ribosome 2 19.6434909 32/176
## BgRatio pvalue p.adjust qvalue
## mmu04115 76/6577 1.089077e-04 1.274221e-02 1.180789e-02
## mmu04310 162/6577 5.537855e-04 3.239645e-02 3.002100e-02
## mmu04110 140/6577 4.197923e-03 1.637190e-01 1.517144e-01
## mmu00790 11/6577 1.064389e-02 3.113339e-01 2.885056e-01
## mmu03022 36/6577 1.519068e-02 3.181397e-01 2.948123e-01
## mmu03010 133/6577 1.713413e-22 2.364510e-20 2.272527e-20
## geneID
## mmu04115 12444/12447/170770/12445/12122/75747/67874
## mmu04310 12444/20319/17869/57265/12445/72293/13380/18021/12301
## mmu04110 12444/12447/12577/17869/12445/218294/13557
## mmu00790 110391/20751
## mmu03022 68776/319944/237336
## mmu03010 57808/20091/16898/110954/68028/20042/19988/20055/27367/75617/20102/67891/19896/78294/68193/66475/19933/19989/27207/20068/20085/56040/269261/19951/20116/100040298/267019/27176/20104/114641/67941/11837
## Count
## mmu04115 7
## mmu04310 9
## mmu04110 7
## mmu00790 2
## mmu03022 3
## mmu03010 32
enrichment_GO <- enrich_module(Kmeans_clustering_ENS, org.Mm.eg.db, 'GO', organism = 'mmu')
head(enrichment_GO)
## ID Description module
## GO:0060537 GO:0060537 muscle tissue development 1
## GO:0042744 GO:0042744 hydrogen peroxide catabolic process 1
## GO:0050678 GO:0050678 regulation of epithelial cell proliferation 1
## GO:0030900 GO:0030900 forebrain development 1
## GO:0007517 GO:0007517 muscle organ development 1
## GO:0002181 GO:0002181 cytoplasmic translation 2
## -log10(q-value) GeneRatio BgRatio pvalue p.adjust
## GO:0060537 4.870024 22/284 494/28943 4.732070e-09 1.688876e-05
## GO:0042744 4.710916 7/284 30/28943 1.365178e-08 2.436160e-05
## GO:0050678 4.044400 19/284 442/28943 9.501588e-08 1.130372e-04
## GO:0030900 3.315658 17/284 407/28943 6.783868e-07 6.052906e-04
## GO:0007517 3.263687 16/284 371/28943 9.557803e-07 6.822360e-04
## GO:0002181 25.439964 35/507 146/28943 1.111316e-29 4.539726e-26
## qvalue
## GO:0060537 1.348889e-05
## GO:0042744 1.945738e-05
## GO:0050678 9.028176e-05
## GO:0030900 4.834398e-04
## GO:0007517 5.448954e-04
## GO:0002181 3.631079e-26
## geneID
## GO:0060537 12444/108655/114142/57810/101401/16002/14609/17898/11819/17869/67040/22003/56449/228019/16011/13380/21388/14725/18021/14775/12301/75547
## GO:0042744 15135/15126/15132/15122/69675/15129/14775
## GO:0050678 15364/12444/20319/108655/67866/114142/12577/19272/12822/16002/14609/11819/17869/18102/226841/20449/12122/14775/12034
## GO:0030900 15364/114142/13865/57810/22771/14013/18423/11819/18846/140486/15228/13380/14725/18212/20563/14168/56207
## GO:0007517 108655/114142/57810/16002/14609/11819/17869/57265/67040/28146/22003/56449/228019/14725/18021/14775
## GO:0002181 57808/20091/18458/16898/68028/20042/19988/20055/27367/234734/75617/20102/98221/67891/56403/19896/78294/13685/13629/68193/66475/19933/19989/27207/20068/20085/56040/269261/19951/20116/267019/27176/20104/114641/11837
## Count
## GO:0060537 22
## GO:0042744 7
## GO:0050678 19
## GO:0030900 17
## GO:0007517 16
## GO:0002181 35
Mm_func = enrichment_GO[order(enrichment_GO$module,decreasing = F),c(2,3,9)]
Mm_func$score = -log10(Mm_func$qvalue)
Mm_func$rank = 1:nrow(Mm_func)
Mm_func$module = factor(Mm_func$module)
Mm_col = c(colors <- c(rgb(174/255,199/255,232/255), rgb(103/255,193/255,227/255), rgb(91/255,166/255,218/255), rgb(0/255,114/255,189/255), rgb(253/255,209/255,176/255), rgb(239/255,153/255,81/255), rgb(229/255,97/255,69/255),
rgb(175/255,180/255,219/255)))
ggplot2::ggplot(Mm_func,aes(x=reorder(Description,rank),y=score))+
geom_bar(position="dodge",stat="identity",aes(fill = module))+
theme_classic()+ theme(panel.grid=element_blank(),text=element_text(size=15,face = "bold"))+
xlab(NULL)+coord_flip() + ylab('-log10(qvalue)')+scale_fill_discrete(name = "module")+
scale_fill_manual(values=Mm_col)